library(tidyverse)
# Source: https://legacy.baseballprospectus.com/compensation/?team=WAS
salary_2019 <- read_csv("Salary_2019.csv")
# Source: https://www.spotrac.com/mlb/washington-nationals/payroll/2023/
salary_2023 <- read_csv("Salary_2023.csv")
# Looking at position and salary column
salary_2019 <- salary_2019 %>% dplyr::select("Pos","Salary") %>% na.omit()
salary_2023 <- salary_2023 %>% dplyr::select("POS.","BASE SALARY") %>% na.omit()
# check positions in each dataset
unique(salary_2019$Pos) [1] "SP" "3B" "1B" "2B" "RF" "C" "RP" "SS" "CF" "LF" "DNP" "PH"
unique(salary_2023$`POS.`) [1] "SP" "RP/CL" "RF" "1B" "RP" "C" "2B" "SS" "LF"
[10] "3B" "OF" "CF"
# replace the 2023 "RP/CL" to "RP", change the dollar format into value format
salary_2023 <- salary_2023 %>% mutate(Pos = ifelse(`POS.`!="RP/CL",`POS.`,"RP")) %>% mutate(Salary_2023=as.numeric(gsub("\\$", "", gsub(",", "", `BASE SALARY`)))) %>% dplyr::select(Pos, Salary_2023)
# change the 2019 data dollar format into value format
salary_2019 <- salary_2019 %>% mutate(Salary_2019=as.numeric(gsub("\\$", "", gsub(",", "", Salary))))
# Group by position
salary_2023_1 <- salary_2023 %>% group_by(Pos) %>% summarize(salary_sum_2023=sum(as.numeric(Salary_2023)))
# calculate total
salary_2023_1$total_2023 <- sum(salary_2023_1$salary_sum_2023)
# calculate percentage
salary_2023_1$percentage_2023 <- salary_2023_1$salary_sum_2023/salary_2023_1$total_2023
# Group by position
salary_2019_1 <- salary_2019 %>% group_by(Pos) %>% summarize(salary_sum_2019=sum(as.numeric(Salary_2019)))
# calculate total
salary_2019_1$total_2019 <- sum(salary_2019_1$salary_sum_2019)
# calculate percentage
salary_2019_1$percentage_2019 <- salary_2019_1$salary_sum_2019/salary_2019_1$total_2019